/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    Filter.java
 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.filters;

import weka.core.Capabilities;
import weka.core.CapabilitiesHandler;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Queue;
import weka.core.RelationalLocator;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.SerializedObject;
import weka.core.StringLocator;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;
import weka.core.Version;
import weka.core.Capabilities.Capability;
import weka.core.converters.ConverterUtils.DataSource;

import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.io.Serializable;
import java.util.Date;
import java.util.Enumeration;
import java.util.Iterator;

/**
 * An abstract class for instance filters: objects that take instances as input,
 * carry out some transformation on the instance and then output the instance.
 * The method implementations in this class assume that most of the work will be
 * done in the methods overridden by subclasses.
 * <p>
 * 
 * A simple example of filter use. This example doesn't remove instances from
 * the output queue until all instances have been input, so has higher memory
 * consumption than an approach that uses output instances as they are made
 * available:
 * <p>
 * 
 * <code> <pre>
 *  Filter filter = ..some type of filter..
 *  Instances instances = ..some instances..
 *  for (int i = 0; i < data.numInstances(); i++) {
 *    filter.input(data.instance(i));
 *  }
 *  filter.batchFinished();
 *  Instances newData = filter.outputFormat();
 *  Instance processed;
 *  while ((processed = filter.output()) != null) {
 *    newData.add(processed);
 *  }
 *  ..do something with newData..
 * </pre> </code>
 * 
 * @author Len Trigg (trigg@cs.waikato.ac.nz)
 * @version $Revision: 6833 $
 */
public abstract class Filter implements Serializable, CapabilitiesHandler,
		RevisionHandler {

	/** for serialization */
	private static final long serialVersionUID = -8835063755891851218L;

	/** The output format for instances */
	private Instances m_OutputFormat = null;

	/** The output instance queue */
	private Queue m_OutputQueue = null;

	/** Indices of string attributes in the output format */
	protected StringLocator m_OutputStringAtts = null;

	/** Indices of string attributes in the input format */
	protected StringLocator m_InputStringAtts = null;

	/** Indices of relational attributes in the output format */
	protected RelationalLocator m_OutputRelAtts = null;

	/** Indices of relational attributes in the input format */
	protected RelationalLocator m_InputRelAtts = null;

	/** The input format for instances */
	private Instances m_InputFormat = null;

	/** Record whether the filter is at the start of a batch */
	protected boolean m_NewBatch = true;

	/** True if the first batch has been done */
	protected boolean m_FirstBatchDone = false;

	/**
	 * Returns true if the a new batch was started, either a new instance of the
	 * filter was created or the batchFinished() method got called.
	 * 
	 * @return true if a new batch has been initiated
	 * @see #m_NewBatch
	 * @see #batchFinished()
	 */
	public boolean isNewBatch() {
		return m_NewBatch;
	}

	/**
	 * Returns true if the first batch of instances got processed. Necessary for
	 * supervised filters, which "learn" from the first batch and then shouldn't
	 * get updated with subsequent calls of batchFinished().
	 * 
	 * @return true if the first batch has been processed
	 * @see #m_FirstBatchDone
	 * @see #batchFinished()
	 */
	public boolean isFirstBatchDone() {
		return m_FirstBatchDone;
	}

	/**
	 * Returns the Capabilities of this filter. Derived filters have to override
	 * this method to enable capabilities.
	 * 
	 * @return the capabilities of this object
	 * @see Capabilities
	 */
	public Capabilities getCapabilities() {
		Capabilities result;

		result = new Capabilities(this);
		result.enableAll();

		result.setMinimumNumberInstances(0);

		return result;
	}

	/**
	 * Returns the revision string.
	 * 
	 * @return the revision
	 */
	public String getRevision() {
		return RevisionUtils.extract("$Revision: 6833 $");
	}

	/**
	 * Returns the Capabilities of this filter, customized based on the data.
	 * I.e., if removes all class capabilities, in case there's not class
	 * attribute present or removes the NO_CLASS capability, in case that
	 * there's a class present.
	 * 
	 * @param data
	 *            the data to use for customization
	 * @return the capabilities of this object, based on the data
	 * @see #getCapabilities()
	 */
	public Capabilities getCapabilities(Instances data) {
		Capabilities result;
		Capabilities classes;
		Iterator iter;
		Capability cap;

		result = getCapabilities();

		// no class? -> remove all class capabilites apart from NO_CLASS
		if (data.classIndex() == -1) {
			classes = result.getClassCapabilities();
			iter = classes.capabilities();
			while (iter.hasNext()) {
				cap = (Capability) iter.next();
				if (cap != Capability.NO_CLASS) {
					result.disable(cap);
					result.disableDependency(cap);
				}
			}
		}
		// class? -> remove NO_CLASS
		else {
			result.disable(Capability.NO_CLASS);
			result.disableDependency(Capability.NO_CLASS);
		}

		return result;
	}

	/**
	 * Sets the format of output instances. The derived class should use this
	 * method once it has determined the outputformat. The output queue is
	 * cleared.
	 * 
	 * @param outputFormat
	 *            the new output format
	 */
	protected void setOutputFormat(Instances outputFormat) {

		if (outputFormat != null) {
			m_OutputFormat = outputFormat.stringFreeStructure();
			initOutputLocators(m_OutputFormat, null);

			// Rename the relation
			String relationName = outputFormat.relationName() + "-"
					+ this.getClass().getName();
			if (this instanceof OptionHandler) {
				String[] options = ((OptionHandler) this).getOptions();
				for (int i = 0; i < options.length; i++) {
					relationName += options[i].trim();
				}
			}
			m_OutputFormat.setRelationName(relationName);
		} else {
			m_OutputFormat = null;
		}
		m_OutputQueue = new Queue();
	}

	/**
	 * Gets the currently set inputformat instances. This dataset may contain
	 * buffered instances.
	 * 
	 * @return the input Instances.
	 */
	protected Instances getInputFormat() {

		return m_InputFormat;
	}

	/**
	 * Returns a reference to the current input format without copying it.
	 * 
	 * @return a reference to the current input format
	 */
	protected Instances inputFormatPeek() {

		return m_InputFormat;
	}

	/**
	 * Returns a reference to the current output format without copying it.
	 * 
	 * @return a reference to the current output format
	 */
	protected Instances outputFormatPeek() {

		return m_OutputFormat;
	}

	/**
	 * Adds an output instance to the queue. The derived class should use this
	 * method for each output instance it makes available.
	 * 
	 * @param instance
	 *            the instance to be added to the queue.
	 */
	protected void push(Instance instance) {

		if (instance != null) {
			if (instance.dataset() != null)
				copyValues(instance, false);
			instance.setDataset(m_OutputFormat);
			m_OutputQueue.push(instance);
		}
	}

	/**
	 * Clears the output queue.
	 */
	protected void resetQueue() {

		m_OutputQueue = new Queue();
	}

	/**
	 * Adds the supplied input instance to the inputformat dataset for later
	 * processing. Use this method rather than getInputFormat().add(instance).
	 * Or else. Note that the provided instance gets copied when buffered.
	 * 
	 * @param instance
	 *            the <code>Instance</code> to buffer.
	 */
	protected void bufferInput(Instance instance) {

		if (instance != null) {
			copyValues(instance, true);
			m_InputFormat.add(instance);
		}
	}

	/**
	 * Initializes the input attribute locators. If indices is null then all
	 * attributes of the data will be considered, otherwise only the ones that
	 * were provided.
	 * 
	 * @param data
	 *            the data to initialize the locators with
	 * @param indices
	 *            if not null, the indices to which to restrict the locating
	 */
	protected void initInputLocators(Instances data, int[] indices) {
		if (indices == null) {
			m_InputStringAtts = new StringLocator(data);
			m_InputRelAtts = new RelationalLocator(data);
		} else {
			m_InputStringAtts = new StringLocator(data, indices);
			m_InputRelAtts = new RelationalLocator(data, indices);
		}
	}

	/**
	 * Initializes the output attribute locators. If indices is null then all
	 * attributes of the data will be considered, otherwise only the ones that
	 * were provided.
	 * 
	 * @param data
	 *            the data to initialize the locators with
	 * @param indices
	 *            if not null, the indices to which to restrict the locating
	 */
	protected void initOutputLocators(Instances data, int[] indices) {
		if (indices == null) {
			m_OutputStringAtts = new StringLocator(data);
			m_OutputRelAtts = new RelationalLocator(data);
		} else {
			m_OutputStringAtts = new StringLocator(data, indices);
			m_OutputRelAtts = new RelationalLocator(data, indices);
		}
	}

	/**
	 * Copies string/relational values contained in the instance copied to a new
	 * dataset. The Instance must already be assigned to a dataset. This dataset
	 * and the destination dataset must have the same structure.
	 * 
	 * @param instance
	 *            the Instance containing the string/relational values to copy.
	 * @param isInput
	 *            if true the input format and input attribute locators are used
	 *            otherwise the output format and output locators
	 */
	protected void copyValues(Instance instance, boolean isInput) {

		RelationalLocator.copyRelationalValues(instance,
				(isInput) ? m_InputFormat : m_OutputFormat,
				(isInput) ? m_InputRelAtts : m_OutputRelAtts);

		StringLocator.copyStringValues(instance, (isInput) ? m_InputFormat
				: m_OutputFormat, (isInput) ? m_InputStringAtts
				: m_OutputStringAtts);
	}

	/**
	 * Takes string/relational values referenced by an Instance and copies them
	 * from a source dataset to a destination dataset. The instance references
	 * are updated to be valid for the destination dataset. The instance may
	 * have the structure (i.e. number and attribute position) of either dataset
	 * (this affects where references are obtained from). Only works if the
	 * number of string/relational attributes is the same in both indices
	 * (implicitly these string/relational attributes should be semantically
	 * same but just with shifted positions).
	 * 
	 * @param instance
	 *            the instance containing references to strings/ relational
	 *            values in the source dataset that will have references updated
	 *            to be valid for the destination dataset.
	 * @param instSrcCompat
	 *            true if the instance structure is the same as the source, or
	 *            false if it is the same as the destination (i.e. which of the
	 *            string/relational attribute indices contains the correct
	 *            locations for this instance).
	 * @param srcDataset
	 *            the dataset for which the current instance string/relational
	 *            value references are valid (after any position mapping if
	 *            needed)
	 * @param destDataset
	 *            the dataset for which the current instance string/relational
	 *            value references need to be inserted (after any position
	 *            mapping if needed)
	 */
	protected void copyValues(Instance instance, boolean instSrcCompat,
			Instances srcDataset, Instances destDataset) {

		RelationalLocator.copyRelationalValues(instance, instSrcCompat,
				srcDataset, m_InputRelAtts, destDataset, m_OutputRelAtts);

		StringLocator.copyStringValues(instance, instSrcCompat, srcDataset,
				m_InputStringAtts, getOutputFormat(), m_OutputStringAtts);
	}

	/**
	 * This will remove all buffered instances from the inputformat dataset. Use
	 * this method rather than getInputFormat().delete();
	 */
	protected void flushInput() {

		if ((m_InputStringAtts.getAttributeIndices().length > 0)
				|| (m_InputRelAtts.getAttributeIndices().length > 0)) {
			m_InputFormat = m_InputFormat.stringFreeStructure();
			m_InputStringAtts = new StringLocator(m_InputFormat,
					m_InputStringAtts.getAllowedIndices());
		} else {
			// This more efficient than new Instances(m_InputFormat, 0);
			m_InputFormat.delete();
		}
	}

	/**
	 * tests the data whether the filter can actually handle it
	 * 
	 * @param instanceInfo
	 *            the data to test
	 * @throws Exception
	 *             if the test fails
	 */
	protected void testInputFormat(Instances instanceInfo) throws Exception {
		getCapabilities(instanceInfo).testWithFail(instanceInfo);
	}

	/**
	 * Sets the format of the input instances. If the filter is able to
	 * determine the output format before seeing any input instances, it does so
	 * here. This default implementation clears the output format and output
	 * queue, and the new batch flag is set. Overriders should call
	 * <code>super.setInputFormat(Instances)</code>
	 * 
	 * @param instanceInfo
	 *            an Instances object containing the input instance structure
	 *            (any instances contained in the object are ignored - only the
	 *            structure is required).
	 * @return true if the outputFormat may be collected immediately
	 * @throws Exception
	 *             if the inputFormat can't be set successfully
	 */
	public boolean setInputFormat(Instances instanceInfo) throws Exception {

		testInputFormat(instanceInfo);

		m_InputFormat = instanceInfo.stringFreeStructure();
		m_OutputFormat = null;
		m_OutputQueue = new Queue();
		m_NewBatch = true;
		m_FirstBatchDone = false;
		initInputLocators(m_InputFormat, null);
		return false;
	}

	/**
	 * Gets the format of the output instances. This should only be called after
	 * input() or batchFinished() has returned true. The relation name of the
	 * output instances should be changed to reflect the action of the filter
	 * (eg: add the filter name and options).
	 * 
	 * @return an Instances object containing the output instance structure
	 *         only.
	 * @throws NullPointerException
	 *             if no input structure has been defined (or the output format
	 *             hasn't been determined yet)
	 */
	public Instances getOutputFormat() {

		if (m_OutputFormat == null) {
			throw new NullPointerException("No output format defined.");
		}
		return new Instances(m_OutputFormat, 0);
	}

	/**
	 * Input an instance for filtering. Ordinarily the instance is processed and
	 * made available for output immediately. Some filters require all instances
	 * be read before producing output, in which case output instances should be
	 * collected after calling batchFinished(). If the input marks the start of
	 * a new batch, the output queue is cleared. This default implementation
	 * assumes all instance conversion will occur when batchFinished() is
	 * called.
	 * 
	 * @param instance
	 *            the input instance
	 * @return true if the filtered instance may now be collected with output().
	 * @throws NullPointerException
	 *             if the input format has not been defined.
	 * @throws Exception
	 *             if the input instance was not of the correct format or if
	 *             there was a problem with the filtering.
	 */
	public boolean input(Instance instance) throws Exception {

		if (m_InputFormat == null) {
			throw new NullPointerException("No input instance format defined");
		}
		if (m_NewBatch) {
			m_OutputQueue = new Queue();
			m_NewBatch = false;
		}
		bufferInput(instance);
		return false;
	}

	/**
	 * Signify that this batch of input to the filter is finished. If the filter
	 * requires all instances prior to filtering, output() may now be called to
	 * retrieve the filtered instances. Any subsequent instances filtered should
	 * be filtered based on setting obtained from the first batch (unless the
	 * inputFormat has been re-assigned or new options have been set). This
	 * default implementation assumes all instance processing occurs during
	 * inputFormat() and input().
	 * 
	 * @return true if there are instances pending output
	 * @throws NullPointerException
	 *             if no input structure has been defined,
	 * @throws Exception
	 *             if there was a problem finishing the batch.
	 */
	public boolean batchFinished() throws Exception {

		if (m_InputFormat == null) {
			throw new NullPointerException("No input instance format defined");
		}
		flushInput();
		m_NewBatch = true;
		m_FirstBatchDone = true;

		if (m_OutputQueue.empty()) {
			// Clear out references to old strings/relationals occasionally
			if ((m_OutputStringAtts.getAttributeIndices().length > 0)
					|| (m_OutputRelAtts.getAttributeIndices().length > 0)) {
				m_OutputFormat = m_OutputFormat.stringFreeStructure();
				m_OutputStringAtts = new StringLocator(m_OutputFormat,
						m_OutputStringAtts.getAllowedIndices());
			}
		}

		return (numPendingOutput() != 0);
	}

	/**
	 * Output an instance after filtering and remove from the output queue.
	 * 
	 * @return the instance that has most recently been filtered (or null if the
	 *         queue is empty).
	 * @throws NullPointerException
	 *             if no output structure has been defined
	 */
	public Instance output() {

		if (m_OutputFormat == null) {
			throw new NullPointerException("No output instance format defined");
		}
		if (m_OutputQueue.empty()) {
			return null;
		}
		Instance result = (Instance) m_OutputQueue.pop();

		return result;
	}

	/**
	 * Output an instance after filtering but do not remove from the output
	 * queue.
	 * 
	 * @return the instance that has most recently been filtered (or null if the
	 *         queue is empty).
	 * @throws NullPointerException
	 *             if no input structure has been defined
	 */
	public Instance outputPeek() {

		if (m_OutputFormat == null) {
			throw new NullPointerException("No output instance format defined");
		}
		if (m_OutputQueue.empty()) {
			return null;
		}
		Instance result = (Instance) m_OutputQueue.peek();
		return result;
	}

	/**
	 * Returns the number of instances pending output
	 * 
	 * @return the number of instances pending output
	 * @throws NullPointerException
	 *             if no input structure has been defined
	 */
	public int numPendingOutput() {

		if (m_OutputFormat == null) {
			throw new NullPointerException("No output instance format defined");
		}
		return m_OutputQueue.size();
	}

	/**
	 * Returns whether the output format is ready to be collected
	 * 
	 * @return true if the output format is set
	 */
	public boolean isOutputFormatDefined() {

		return (m_OutputFormat != null);
	}

	/**
	 * Creates a deep copy of the given filter using serialization.
	 * 
	 * @param model
	 *            the filter to copy
	 * @return a deep copy of the filter
	 * @throws Exception
	 *             if an error occurs
	 */
	public static Filter makeCopy(Filter model) throws Exception {
		return (Filter) new SerializedObject(model).getObject();
	}

	/**
	 * Creates a given number of deep copies of the given filter using
	 * serialization.
	 * 
	 * @param model
	 *            the filter to copy
	 * @param num
	 *            the number of filter copies to create.
	 * @return an array of filters.
	 * @throws Exception
	 *             if an error occurs
	 */
	public static Filter[] makeCopies(Filter model, int num) throws Exception {

		if (model == null) {
			throw new Exception("No model filter set");
		}
		Filter[] filters = new Filter[num];
		SerializedObject so = new SerializedObject(model);
		for (int i = 0; i < filters.length; i++) {
			filters[i] = (Filter) so.getObject();
		}
		return filters;
	}

	/**
	 * Filters an entire set of instances through a filter and returns the new
	 * set.
	 * 
	 * @param data
	 *            the data to be filtered
	 * @param filter
	 *            the filter to be used
	 * @return the filtered set of data
	 * @throws Exception
	 *             if the filter can't be used successfully
	 */
	public static Instances useFilter(Instances data, Filter filter)
			throws Exception {
		/*
		 * System.err.println(filter.getClass().getName() + " in:" +
		 * data.numInstances());
		 */
		for (int i = 0; i < data.numInstances(); i++) {
			filter.input(data.instance(i));
		}
		filter.batchFinished();
		Instances newData = filter.getOutputFormat();
		Instance processed;
		while ((processed = filter.output()) != null) {
			newData.add(processed);
		}

		/*
		 * System.err.println(filter.getClass().getName() + " out:" +
		 * newData.numInstances());
		 */
		return newData;
	}

	/**
	 * Returns a description of the filter, by default only the classname.
	 * 
	 * @return a string describing the filter
	 */
	public String toString() {
		return this.getClass().getName();
	}

	/**
	 * generates source code from the filter
	 * 
	 * @param filter
	 *            the filter to output as source
	 * @param className
	 *            the name of the generated class
	 * @param input
	 *            the input data the header is generated for
	 * @param output
	 *            the output data the header is generated for
	 * @return the generated source code
	 * @throws Exception
	 *             if source code cannot be generated
	 */
	public static String wekaStaticWrapper(Sourcable filter, String className,
			Instances input, Instances output) throws Exception {

		StringBuffer result;
		int i;
		int n;

		result = new StringBuffer();

		result.append("// Generated with Weka " + Version.VERSION + "\n");
		result.append("//\n");
		result.append("// This code is public domain and comes with no warranty.\n");
		result.append("//\n");
		result.append("// Timestamp: " + new Date() + "\n");
		result.append("// Relation: " + input.relationName() + "\n");
		result.append("\n");

		result.append("package weka.filters;\n");
		result.append("\n");
		result.append("import weka.core.Attribute;\n");
		result.append("import weka.core.Capabilities;\n");
		result.append("import weka.core.Capabilities.Capability;\n");
		result.append("import weka.core.FastVector;\n");
		result.append("import weka.core.Instance;\n");
		result.append("import weka.core.Instances;\n");
		result.append("import weka.filters.Filter;\n");
		result.append("\n");
		result.append("public class WekaWrapper\n");
		result.append("  extends Filter {\n");

		// globalInfo
		result.append("\n");
		result.append("  /**\n");
		result.append("   * Returns only the toString() method.\n");
		result.append("   *\n");
		result.append("   * @return a string describing the filter\n");
		result.append("   */\n");
		result.append("  public String globalInfo() {\n");
		result.append("    return toString();\n");
		result.append("  }\n");

		// getCapabilities
		result.append("\n");
		result.append("  /**\n");
		result.append("   * Returns the capabilities of this filter.\n");
		result.append("   *\n");
		result.append("   * @return the capabilities\n");
		result.append("   */\n");
		result.append("  public Capabilities getCapabilities() {\n");
		result.append(((Filter) filter).getCapabilities().toSource("result", 4));
		result.append("    return result;\n");
		result.append("  }\n");

		// objectsToInstance
		result.append("\n");
		result.append("  /**\n");
		result.append("   * turns array of Objects into an Instance object\n");
		result.append("   *\n");
		result.append("   * @param obj	the Object array to turn into an Instance\n");
		result.append("   * @param format	the data format to use\n");
		result.append("   * @return		the generated Instance object\n");
		result.append("   */\n");
		result.append("  protected Instance objectsToInstance(Object[] obj, Instances format) {\n");
		result.append("    Instance		result;\n");
		result.append("    double[]		values;\n");
		result.append("    int		i;\n");
		result.append("\n");
		result.append("    values = new double[obj.length];\n");
		result.append("\n");
		result.append("    for (i = 0 ; i < obj.length; i++) {\n");
		result.append("      if (obj[i] == null)\n");
		result.append("        values[i] = Instance.missingValue();\n");
		result.append("      else if (format.attribute(i).isNumeric())\n");
		result.append("        values[i] = (Double) obj[i];\n");
		result.append("      else if (format.attribute(i).isNominal())\n");
		result.append("        values[i] = format.attribute(i).indexOfValue((String) obj[i]);\n");
		result.append("    }\n");
		result.append("\n");
		result.append("    // create new instance\n");
		result.append("    result = new Instance(1.0, values);\n");
		result.append("    result.setDataset(format);\n");
		result.append("\n");
		result.append("    return result;\n");
		result.append("  }\n");

		// instanceToObjects
		result.append("\n");
		result.append("  /**\n");
		result.append("   * turns the Instance object into an array of Objects\n");
		result.append("   *\n");
		result.append("   * @param inst	the instance to turn into an array\n");
		result.append("   * @return		the Object array representing the instance\n");
		result.append("   */\n");
		result.append("  protected Object[] instanceToObjects(Instance inst) {\n");
		result.append("    Object[]	result;\n");
		result.append("    int		i;\n");
		result.append("\n");
		result.append("    result = new Object[inst.numAttributes()];\n");
		result.append("\n");
		result.append("    for (i = 0 ; i < inst.numAttributes(); i++) {\n");
		result.append("      if (inst.isMissing(i))\n");
		result.append("  	result[i] = null;\n");
		result.append("      else if (inst.attribute(i).isNumeric())\n");
		result.append("  	result[i] = inst.value(i);\n");
		result.append("      else\n");
		result.append("  	result[i] = inst.stringValue(i);\n");
		result.append("    }\n");
		result.append("\n");
		result.append("    return result;\n");
		result.append("  }\n");

		// instancesToObjects
		result.append("\n");
		result.append("  /**\n");
		result.append("   * turns the Instances object into an array of Objects\n");
		result.append("   *\n");
		result.append("   * @param data	the instances to turn into an array\n");
		result.append("   * @return		the Object array representing the instances\n");
		result.append("   */\n");
		result.append("  protected Object[][] instancesToObjects(Instances data) {\n");
		result.append("    Object[][]	result;\n");
		result.append("    int		i;\n");
		result.append("\n");
		result.append("    result = new Object[data.numInstances()][];\n");
		result.append("\n");
		result.append("    for (i = 0; i < data.numInstances(); i++)\n");
		result.append("      result[i] = instanceToObjects(data.instance(i));\n");
		result.append("\n");
		result.append("    return result;\n");
		result.append("  }\n");

		// setInputFormat
		result.append("\n");
		result.append("  /**\n");
		result.append("   * Only tests the input data.\n");
		result.append("   *\n");
		result.append("   * @param instanceInfo the format of the data to convert\n");
		result.append("   * @return always true, to indicate that the output format can \n");
		result.append("   *         be collected immediately.\n");
		result.append("   */\n");
		result.append("  public boolean setInputFormat(Instances instanceInfo) throws Exception {\n");
		result.append("    super.setInputFormat(instanceInfo);\n");
		result.append("    \n");
		result.append("    // generate output format\n");
		result.append("    FastVector atts = new FastVector();\n");
		result.append("    FastVector attValues;\n");
		for (i = 0; i < output.numAttributes(); i++) {
			result.append("    // " + output.attribute(i).name() + "\n");
			if (output.attribute(i).isNumeric()) {
				result.append("    atts.addElement(new Attribute(\""
						+ output.attribute(i).name() + "\"));\n");
			} else if (output.attribute(i).isNominal()) {
				result.append("    attValues = new FastVector();\n");
				for (n = 0; n < output.attribute(i).numValues(); n++) {
					result.append("    attValues.addElement(\""
							+ output.attribute(i).value(n) + "\");\n");
				}
				result.append("    atts.addElement(new Attribute(\""
						+ output.attribute(i).name() + "\", attValues));\n");
			} else {
				throw new UnsupportedAttributeTypeException("Attribute type '"
						+ output.attribute(i).type() + "' (position " + (i + 1)
						+ ") is not supported!");
			}
		}
		result.append("    \n");
		result.append("    Instances format = new Instances(\""
				+ output.relationName() + "\", atts, 0);\n");
		result.append("    format.setClassIndex(" + output.classIndex()
				+ ");\n");
		result.append("    setOutputFormat(format);\n");
		result.append("    \n");
		result.append("    return true;\n");
		result.append("  }\n");

		// input
		result.append("\n");
		result.append("  /**\n");
		result.append("   * Directly filters the instance.\n");
		result.append("   *\n");
		result.append("   * @param instance the instance to convert\n");
		result.append("   * @return always true, to indicate that the output can \n");
		result.append("   *         be collected immediately.\n");
		result.append("   */\n");
		result.append("  public boolean input(Instance instance) throws Exception {\n");
		result.append("    Object[] filtered = " + className
				+ ".filter(instanceToObjects(instance));\n");
		result.append("    push(objectsToInstance(filtered, getOutputFormat()));\n");
		result.append("    return true;\n");
		result.append("  }\n");

		// batchFinished
		result.append("\n");
		result.append("  /**\n");
		result.append("   * Performs a batch filtering of the buffered data, if any available.\n");
		result.append("   *\n");
		result.append("   * @return true if instances were filtered otherwise false\n");
		result.append("   */\n");
		result.append("  public boolean batchFinished() throws Exception {\n");
		result.append("    if (getInputFormat() == null)\n");
		result.append("      throw new NullPointerException(\"No input instance format defined\");;\n");
		result.append("\n");
		result.append("    Instances inst = getInputFormat();\n");
		result.append("    if (inst.numInstances() > 0) {\n");
		result.append("      Object[][] filtered = " + className
				+ ".filter(instancesToObjects(inst));\n");
		result.append("      for (int i = 0; i < filtered.length; i++) {\n");
		result.append("        push(objectsToInstance(filtered[i], getOutputFormat()));\n");
		result.append("      }\n");
		result.append("    }\n");
		result.append("\n");
		result.append("    flushInput();\n");
		result.append("    m_NewBatch = true;\n");
		result.append("    m_FirstBatchDone = true;\n");
		result.append("\n");
		result.append("    return (inst.numInstances() > 0);\n");
		result.append("  }\n");

		// toString
		result.append("\n");
		result.append("  /**\n");
		result.append("   * Returns only the classnames and what filter it is based on.\n");
		result.append("   *\n");
		result.append("   * @return a short description\n");
		result.append("   */\n");
		result.append("  public String toString() {\n");
		result.append("    return \"Auto-generated filter wrapper, based on "
				+ filter.getClass().getName() + " (generated with Weka "
				+ Version.VERSION + ").\\n"
				+ "\" + this.getClass().getName() + \"/" + className + "\";\n");
		result.append("  }\n");

		// main
		result.append("\n");
		result.append("  /**\n");
		result.append("   * Runs the filter from commandline.\n");
		result.append("   *\n");
		result.append("   * @param args the commandline arguments\n");
		result.append("   */\n");
		result.append("  public static void main(String args[]) {\n");
		result.append("    runFilter(new WekaWrapper(), args);\n");
		result.append("  }\n");
		result.append("}\n");

		// actual filter code
		result.append("\n");
		result.append(filter.toSource(className, input));

		return result.toString();
	}

	/**
	 * Method for testing filters.
	 * 
	 * @param filter
	 *            the filter to use
	 * @param options
	 *            should contain the following arguments: <br/>
	 *            -i input_file <br/>
	 *            -o output_file <br/>
	 *            -c class_index <br/>
	 *            -z classname (for filters implementing weka.filters.Sourcable) <br/>
	 *            or -h for help on options
	 * @throws Exception
	 *             if something goes wrong or the user requests help on command
	 *             options
	 */
	public static void filterFile(Filter filter, String[] options)
			throws Exception {

		boolean debug = false;
		Instances data = null;
		DataSource input = null;
		PrintWriter output = null;
		boolean helpRequest;
		String sourceCode = "";

		try {
			helpRequest = Utils.getFlag('h', options);

			if (Utils.getFlag('d', options)) {
				debug = true;
			}
			String infileName = Utils.getOption('i', options);
			String outfileName = Utils.getOption('o', options);
			String classIndex = Utils.getOption('c', options);
			if (filter instanceof Sourcable)
				sourceCode = Utils.getOption('z', options);

			if (filter instanceof OptionHandler) {
				((OptionHandler) filter).setOptions(options);
			}

			Utils.checkForRemainingOptions(options);
			if (helpRequest) {
				throw new Exception("Help requested.\n");
			}
			if (infileName.length() != 0) {
				input = new DataSource(infileName);
			} else {
				input = new DataSource(System.in);
			}
			if (outfileName.length() != 0) {
				output = new PrintWriter(new FileOutputStream(outfileName));
			} else {
				output = new PrintWriter(System.out);
			}

			data = input.getStructure();
			if (classIndex.length() != 0) {
				if (classIndex.equals("first")) {
					data.setClassIndex(0);
				} else if (classIndex.equals("last")) {
					data.setClassIndex(data.numAttributes() - 1);
				} else {
					data.setClassIndex(Integer.parseInt(classIndex) - 1);
				}
			}
		} catch (Exception ex) {
			String filterOptions = "";
			// Output the error and also the valid options
			if (filter instanceof OptionHandler) {
				filterOptions += "\nFilter options:\n\n";
				Enumeration enu = ((OptionHandler) filter).listOptions();
				while (enu.hasMoreElements()) {
					Option option = (Option) enu.nextElement();
					filterOptions += option.synopsis() + '\n'
							+ option.description() + "\n";
				}
			}

			String genericOptions = "\nGeneral options:\n\n"
					+ "-h\n"
					+ "\tGet help on available options.\n"
					+ "\t(use -b -h for help on batch mode.)\n"
					+ "-i <file>\n"
					+ "\tThe name of the file containing input instances.\n"
					+ "\tIf not supplied then instances will be read from stdin.\n"
					+ "-o <file>\n"
					+ "\tThe name of the file output instances will be written to.\n"
					+ "\tIf not supplied then instances will be written to stdout.\n"
					+ "-c <class index>\n"
					+ "\tThe number of the attribute to use as the class.\n"
					+ "\t\"first\" and \"last\" are also valid entries.\n"
					+ "\tIf not supplied then no class is assigned.\n";

			if (filter instanceof Sourcable) {
				genericOptions += "-z <class name>\n"
						+ "\tOutputs the source code representing the trained filter.\n";
			}

			throw new Exception('\n' + ex.getMessage() + filterOptions
					+ genericOptions);
		}

		if (debug) {
			System.err.println("Setting input format");
		}
		boolean printedHeader = false;
		if (filter.setInputFormat(data)) {
			if (debug) {
				System.err.println("Getting output format");
			}
			output.println(filter.getOutputFormat().toString());
			printedHeader = true;
		}

		// Pass all the instances to the filter
		Instance inst;
		while (input.hasMoreElements(data)) {
			inst = input.nextElement(data);
			if (debug) {
				System.err.println("Input instance to filter");
			}
			if (filter.input(inst)) {
				if (debug) {
					System.err.println("Filter said collect immediately");
				}
				if (!printedHeader) {
					throw new Error(
							"Filter didn't return true from setInputFormat() "
									+ "earlier!");
				}
				if (debug) {
					System.err.println("Getting output instance");
				}
				output.println(filter.output().toString());
			}
		}

		// Say that input has finished, and print any pending output instances
		if (debug) {
			System.err.println("Setting end of batch");
		}
		if (filter.batchFinished()) {
			if (debug) {
				System.err.println("Filter said collect output");
			}
			if (!printedHeader) {
				if (debug) {
					System.err.println("Getting output format");
				}
				output.println(filter.getOutputFormat().toString());
			}
			if (debug) {
				System.err.println("Getting output instance");
			}
			while (filter.numPendingOutput() > 0) {
				output.println(filter.output().toString());
				if (debug) {
					System.err.println("Getting output instance");
				}
			}
		}
		if (debug) {
			System.err.println("Done");
		}

		if (output != null) {
			output.close();
		}

		if (sourceCode.length() != 0)
			System.out.println(wekaStaticWrapper((Sourcable) filter,
					sourceCode, data, filter.getOutputFormat()));
	}

	/**
	 * Method for testing filters ability to process multiple batches.
	 * 
	 * @param filter
	 *            the filter to use
	 * @param options
	 *            should contain the following arguments: <br/>
	 *            -i (first) input file <br/>
	 *            -o (first) output file <br/>
	 *            -r (second) input file <br/>
	 *            -s (second) output file <br/>
	 *            -c class_index <br/>
	 *            -z classname (for filters implementing weka.filters.Sourcable) <br/>
	 *            or -h for help on options
	 * @throws Exception
	 *             if something goes wrong or the user requests help on command
	 *             options
	 */
	public static void batchFilterFile(Filter filter, String[] options)
			throws Exception {

		Instances firstData = null;
		Instances secondData = null;
		DataSource firstInput = null;
		DataSource secondInput = null;
		PrintWriter firstOutput = null;
		PrintWriter secondOutput = null;
		boolean helpRequest;
		String sourceCode = "";

		try {
			helpRequest = Utils.getFlag('h', options);

			String fileName = Utils.getOption('i', options);
			if (fileName.length() != 0) {
				firstInput = new DataSource(fileName);
			} else {
				throw new Exception("No first input file given.\n");
			}

			fileName = Utils.getOption('r', options);
			if (fileName.length() != 0) {
				secondInput = new DataSource(fileName);
			} else {
				throw new Exception("No second input file given.\n");
			}

			fileName = Utils.getOption('o', options);
			if (fileName.length() != 0) {
				firstOutput = new PrintWriter(new FileOutputStream(fileName));
			} else {
				firstOutput = new PrintWriter(System.out);
			}

			fileName = Utils.getOption('s', options);
			if (fileName.length() != 0) {
				secondOutput = new PrintWriter(new FileOutputStream(fileName));
			} else {
				secondOutput = new PrintWriter(System.out);
			}
			String classIndex = Utils.getOption('c', options);
			if (filter instanceof Sourcable)
				sourceCode = Utils.getOption('z', options);

			if (filter instanceof OptionHandler) {
				((OptionHandler) filter).setOptions(options);
			}
			Utils.checkForRemainingOptions(options);

			if (helpRequest) {
				throw new Exception("Help requested.\n");
			}
			firstData = firstInput.getStructure();
			secondData = secondInput.getStructure();
			if (!secondData.equalHeaders(firstData)) {
				throw new Exception("Input file formats differ.\n");
			}
			if (classIndex.length() != 0) {
				if (classIndex.equals("first")) {
					firstData.setClassIndex(0);
					secondData.setClassIndex(0);
				} else if (classIndex.equals("last")) {
					firstData.setClassIndex(firstData.numAttributes() - 1);
					secondData.setClassIndex(secondData.numAttributes() - 1);
				} else {
					firstData.setClassIndex(Integer.parseInt(classIndex) - 1);
					secondData.setClassIndex(Integer.parseInt(classIndex) - 1);
				}
			}
		} catch (Exception ex) {
			String filterOptions = "";
			// Output the error and also the valid options
			if (filter instanceof OptionHandler) {
				filterOptions += "\nFilter options:\n\n";
				Enumeration enu = ((OptionHandler) filter).listOptions();
				while (enu.hasMoreElements()) {
					Option option = (Option) enu.nextElement();
					filterOptions += option.synopsis() + '\n'
							+ option.description() + "\n";
				}
			}

			String genericOptions = "\nGeneral options:\n\n"
					+ "-h\n"
					+ "\tGet help on available options.\n"
					+ "-i <filename>\n"
					+ "\tThe file containing first input instances.\n"
					+ "-o <filename>\n"
					+ "\tThe file first output instances will be written to.\n"
					+ "-r <filename>\n"
					+ "\tThe file containing second input instances.\n"
					+ "-s <filename>\n"
					+ "\tThe file second output instances will be written to.\n"
					+ "-c <class index>\n"
					+ "\tThe number of the attribute to use as the class.\n"
					+ "\t\"first\" and \"last\" are also valid entries.\n"
					+ "\tIf not supplied then no class is assigned.\n";

			if (filter instanceof Sourcable) {
				genericOptions += "-z <class name>\n"
						+ "\tOutputs the source code representing the trained filter.\n";
			}

			throw new Exception('\n' + ex.getMessage() + filterOptions
					+ genericOptions);
		}
		boolean printedHeader = false;
		if (filter.setInputFormat(firstData)) {
			firstOutput.println(filter.getOutputFormat().toString());
			printedHeader = true;
		}

		// Pass all the instances to the filter
		Instance inst;
		while (firstInput.hasMoreElements(firstData)) {
			inst = firstInput.nextElement(firstData);
			if (filter.input(inst)) {
				if (!printedHeader) {
					throw new Error(
							"Filter didn't return true from setInputFormat() "
									+ "earlier!");
				}
				firstOutput.println(filter.output().toString());
			}
		}

		// Say that input has finished, and print any pending output instances
		if (filter.batchFinished()) {
			if (!printedHeader) {
				firstOutput.println(filter.getOutputFormat().toString());
			}
			while (filter.numPendingOutput() > 0) {
				firstOutput.println(filter.output().toString());
			}
		}

		if (firstOutput != null) {
			firstOutput.close();
		}
		printedHeader = false;
		if (filter.isOutputFormatDefined()) {
			secondOutput.println(filter.getOutputFormat().toString());
			printedHeader = true;
		}
		// Pass all the second instances to the filter
		while (secondInput.hasMoreElements(secondData)) {
			inst = secondInput.nextElement(secondData);
			if (filter.input(inst)) {
				if (!printedHeader) {
					throw new Error("Filter didn't return true from"
							+ " isOutputFormatDefined() earlier!");
				}
				secondOutput.println(filter.output().toString());
			}
		}

		// Say that input has finished, and print any pending output instances
		if (filter.batchFinished()) {
			if (!printedHeader) {
				secondOutput.println(filter.getOutputFormat().toString());
			}
			while (filter.numPendingOutput() > 0) {
				secondOutput.println(filter.output().toString());
			}
		}
		if (secondOutput != null) {
			secondOutput.close();
		}

		if (sourceCode.length() != 0)
			System.out.println(wekaStaticWrapper((Sourcable) filter,
					sourceCode, firstData, filter.getOutputFormat()));
	}

	/**
	 * runs the filter instance with the given options.
	 * 
	 * @param filter
	 *            the filter to run
	 * @param options
	 *            the commandline options
	 */
	protected static void runFilter(Filter filter, String[] options) {
		try {
			if (Utils.getFlag('b', options)) {
				Filter.batchFilterFile(filter, options);
			} else {
				Filter.filterFile(filter, options);
			}
		} catch (Exception e) {
			if ((e.toString().indexOf("Help requested") == -1)
					&& (e.toString().indexOf("Filter options") == -1))
				e.printStackTrace();
			else
				System.err.println(e.getMessage());
		}
	}

	/**
	 * Main method for testing this class.
	 * 
	 * @param args
	 *            should contain arguments to the filter: use -h for help
	 */
	public static void main(String[] args) {

		try {
			if (args.length == 0) {
				throw new Exception(
						"First argument must be the class name of a Filter");
			}
			String fname = args[0];
			Filter f = (Filter) Class.forName(fname).newInstance();
			args[0] = "";
			runFilter(f, args);
		} catch (Exception ex) {
			ex.printStackTrace();
			System.err.println(ex.getMessage());
		}
	}
}
